import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import pandas as pd


def main():
    df_OrbPlat = pd.read_excel(PATHS.manualorbis / 'NAICS336111_BasicInfo.xlsx', sheet_name='Results')
    df_OrbPlat = identify_rows(df_OrbPlat)
    # because NAICS deduplicate rows
    colNaics = [col for col in df_OrbPlat.columns if 'NAICS' in col]
    df_OrbPlatNAICS = df_OrbPlat[['BvD ID number'] + colNaics]
    df_OrbPlatNAICS.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/NAICS336111_primarysecondaryNAICS.csv')
    return


def identify_rows(df_OrbPlat):
    # FORMAT NOT IDEAL: WHEN SEVERAL ENTRIES IN PARTICULAR VARIABLES FOR ONE FIRM (IE SEVERAL ROWS)
    # THE FIRM'S IDENTIFIERS ARE NOT CARRIED OVER
    # MUST IDENTIFY WHICH ROWS ARE FOR WHICH FIRMS BASED ON THE ORDER OF THE ROW
    Flags = df_OrbPlat['BvD ID number'][df_OrbPlat['BvD ID number'].notnull()]
    i = 0
    ind_0 = 0
    ind_n = [1, 2, 3]
    for i, ind_0 in enumerate(Flags.index):
        if i+1 < Flags.shape[0]:
            ind_n = list(range(Flags.index[i] + 1, Flags.index[i+1]))
        else:
            ind_n = list(range(Flags.index[i] + 1, df_OrbPlat.shape[0]))
        print(i, ind_0, ind_n)
        df_OrbPlat.loc[ind_n, 'BvD ID number'] = df_OrbPlat.loc[ind_0, 'BvD ID number']
    return df_OrbPlat

